import urllib.request as ur
import lxml.etree as le
import user_agent

keyword = input('请输入关键词:')
pn_start = int(input('起始页:'))
pn_end = int(input('终止页:'))

def getRequest(url):
    return ur.Request(
        url=url,
        headers={
            'User-Agent':user_agent.get_user_agent_pc(),
        }
    )

def getProxyOpener():
    proxy_address = ur.urlopen('http://api.ip.data5u.com/dynamic/get.html?order=4aaa41c97b16e692bbf317bcfadcaca3&sep=3').read().decode('utf-8').strip()
    proxy_handler = ur.ProxyHandler(
        {
            'http':proxy_address
        }
    )
    return ur.build_opener(proxy_handler)


for pn in range(pn_start,pn_end+1):
    request = getRequest(
        'https://so.csdn.net/so/search/s.do?p=%s&q=%s&t=blog&domain=&o=&s=&u=&l=&f=&rbg=0' % (pn,keyword)
    )
    try:
        response = getProxyOpener().open(request).read()
        href_s = le.HTML(response).xpath('//span[@class="down fr"]/../span[@class="link"]/a/@href')
        for href in href_s:
            try:
                response_blog = getProxyOpener().open(
                    getRequest(href)
                ).read()
                title = le.HTML(response_blog).xpath('//h1[@class="title-article"]/text()')
                print(title)
                with open('blog/%s.html' % title,'wb') as f:
                    f.write(response_blog)
            except Exception as e:
                print(e)
    except:pass
#
#
#
# import urllib.request as ur
# import lxml.etree as le
# import user_agent
# from tool import CookiePool
#
# def getRequest(url):
#     return ur.Request(
#         url=url,
#         headers={
#             'User-Agent':user_agent.get_user_agent_pc(),
#             'Cookie':cookiePool.get_cookie()
#         }
#     )
#
# def getProxyOpener():
#     proxy_address = ur.urlopen('http://api.ip.data5u.com/dynamic/get.html?order=4aaa41c97b16e692bbf317bcfadcaca3&sep=3').read().decode('utf-8').strip()
#     proxy_handler = ur.ProxyHandler(
#         {
#             'http':proxy_address
#         }
#     )
#     return ur.build_opener(proxy_handler)
#
#
# if __name__ == '__main__':
#     keyword = input('请输入关键词:')
#     pn_start = int(input('起始页:'))
#     pn_end = int(input('终止页:'))
#
#     cookiePool = CookiePool(
#         urls=[
#             'https://blog.csdn.net/kzl_knight/article/details/103187223',
#             'https://blog.csdn.net/kzl_knight/article/details/103182569',
#         ])
#     cookiePool.run().start()
#
#     for pn in range(pn_start, pn_end + 1):
#         request = getRequest(
#             'https://so.csdn.net/so/search/s.do?p=%s&q=%s&t=blog&domain=&o=&s=&u=&l=&f=&rbg=0' % (pn, keyword)
#         )
#         try:
#             response = getProxyOpener().open(request).read()
#             href_s = le.HTML(response).xpath('//span[@class="down fr"]/../span[@class="link"]/a/@href')
#             for href in href_s:
#                 try:
#                     response_blog = getProxyOpener().open(
#                         getRequest(href)
#                     ).read()
#                     title = le.HTML(response_blog).xpath('//h1[@class="title-article"]/text()')[0]
#                     print(title)
#                     with open('blog/%s.html' % title, 'wb') as f:
#                         f.write(response_blog)
#                 except Exception as e:
#                     print(e)
#         except:
#             pass
#
#